Learning Objects:

We first zoom in the sewing team since there’s more data available and no missing value contained. We want to explore the data from following perspectives:

  1. How’s productivity affected by each variables? (simply looking at the time series plots)

  2. Is there a team/teams that outperformed the rest? What set them apart?

  3. Is it possible if teams collaborate?

  4. How does the workflow look like?

data <- read.csv("/Users/karawei/Desktop/GRAD 3rd/ESE 527/garments_worker_productivity.csv")
#data <- read.csv("C:/Users/ThinkPad/Documents/GitHub/OK/worker_productivity.csv")


data_org <- read.csv("/Users/karawei/Desktop/GRAD 3rd/ESE 527/garments_worker_productivity.csv")
#data_org <- read.csv("C:/Users/ThinkPad/Documents/GitHub/OK/worker_productivity.csv")

data$department = replace(data$department, data$department=="sweing", "sewing")
data_org$department = replace(data_org$department, data_org$department=="finishing ", "finishing")
data_org$department = replace(data_org$department, data_org$department=="sweing", "sewing")
for (j in 1:length(data$department)){
  if(data$department[j]=="sewing"){data$department[j]<-1}
  else if(data$department[j]=="finishing "){data$department[j]<-2}
  else if(data$department[j]=="finishing"){data$department[j]<-2}
}

# change day of the week to number  Monday=1, Sunday=7
for (i in 1:length(data$day)){
  if(data$day[i]=="Monday"){data$day[i]<-as.integer(1)}
  else if(data$day[i]=="Tuesday"){data$day[i]<-as.integer(2)}
  else if(data$day[i]=="Wednesday"){data$day[i]<-as.integer(3)}
  else if(data$day[i]=="Thursday"){data$day[i]<-as.integer(4)}
  else if(data$day[i]=="Friday"){data$day[i]<-as.integer(5)}
  else if(data$day[i]=="Saturday"){data$day[i]<-as.integer(6)}
  else if(data$day[i]=="Sunday"){data$day[i]<-as.integer(7)}
}

# changing the date to date format
data$date <- as.Date(data$date,format="%m/%d/%Y" )

#Changing Quarter to numbers
for (k in 1:length(data$quarter)){
  if(data$quarter[k]=="Quarter1"){data$quarter[k]<-as.integer(1)}
  else if(data$quarter[k]=="Quarter2"){data$quarter[k]<-as.integer(2)}
  else if(data$quarter[k]=="Quarter3"){data$quarter[k]<-as.integer(3)}
  else if(data$quarter[k]=="Quarter4"){data$quarter[k]<-as.integer(4)}
  else if(data$quarter[k]=="Quarter5"){data$quarter[k]<-as.integer(5)}
}

sewing<-subset(data,data$department==1)
finishing<-subset(data,data$department==2)
sewingTeam01<-subset(sewing,sewing$team==1)
sewingTeam02<-subset(sewing,sewing$team==2)
sewingTeam03<-subset(sewing,sewing$team==3)
sewingTeam04<-subset(sewing,sewing$team==4)
sewingTeam05<-subset(sewing,sewing$team==5)
sewingTeam06<-subset(sewing,sewing$team==6)
sewingTeam07<-subset(sewing,sewing$team==7)
sewingTeam08<-subset(sewing,sewing$team==8)
sewingTeam09<-subset(sewing,sewing$team==9)
sewingTeam10<-subset(sewing,sewing$team==10)
sewingTeam11<-subset(sewing,sewing$team==11)
sewingTeam12<-subset(sewing,sewing$team==12)

finishingTeam01<-subset(finishing,finishing$team==1)
finishingTeam02<-subset(finishing,finishing$team==2)
finishingTeam03<-subset(finishing,finishing$team==3)
finishingTeam04<-subset(finishing,finishing$team==4)
finishingTeam05<-subset(finishing,finishing$team==5)
finishingTeam06<-subset(finishing,finishing$team==6)
finishingTeam07<-subset(finishing,finishing$team==7)
finishingTeam08<-subset(finishing,finishing$team==8)
finishingTeam09<-subset(finishing,finishing$team==9)
finishingTeam10<-subset(finishing,finishing$team==10)
finishingTeam11<-subset(finishing,finishing$team==11)
finishingTeam12<-subset(finishing,finishing$team==12)
head(data_org)
##       date  quarter department      day team targeted_productivity   smv  wip
## 1 1/1/2015 Quarter1     sewing Thursday    8                  0.80 26.16 1108
## 2 1/1/2015 Quarter1  finishing Thursday    1                  0.75  3.94   NA
## 3 1/1/2015 Quarter1     sewing Thursday   11                  0.80 11.41  968
## 4 1/1/2015 Quarter1     sewing Thursday   12                  0.80 11.41  968
## 5 1/1/2015 Quarter1     sewing Thursday    6                  0.80 25.90 1170
## 6 1/1/2015 Quarter1     sewing Thursday    7                  0.80 25.90  984
##   over_time incentive idle_time idle_men no_of_style_change no_of_workers
## 1      7080        98         0        0                  0          59.0
## 2       960         0         0        0                  0           8.0
## 3      3660        50         0        0                  0          30.5
## 4      3660        50         0        0                  0          30.5
## 5      1920        50         0        0                  0          56.0
## 6      6720        38         0        0                  0          56.0
##   actual_productivity
## 1           0.9407254
## 2           0.8865000
## 3           0.8005705
## 4           0.8005705
## 5           0.8003819
## 6           0.8001250

We first convert all the categorical variables into dummy variables. Beause the dataset only has wip values for the sewing department, we start to think whether we should split the dataset into two subcategories - finishing department and sewing department.

Histograms of all numerical variables:

# par(mfrow = c(3, 4))
# hist(data[["actual_productivity"]],main="Productivity Index",xlab="Productivity Index")
# hist(data[["targeted_productivity"]],main="Target Productivity",xlab="Productivity Index")
# hist(data[["smv"]],main="Standard Minute Value",xlab="Minutes Needed for Task")
# hist(data[["over_time"]],main="Over Time",xlab="Minutes of Over Time")
# hist(data[["wip"]],main="Work in Progress",xlab="No. of Unfinished Products")
# hist(data[["incentive"]],main="Financial Incentive",xlab="Bangladash Taka(BDT)",breaks=20)
# hist(data[["idle_time"]],main="Idle Time",xlab="Interrupted Time")
# hist(data[["idle_men"]],main="Idle Workers",xlab="No. of Idle Workers When Production was Interrupted")
# hist(data[["no_of_style_change"]],main="No. of Style Change",xlab="Number of Style Change")
# hist(data[["no_of_workers"]],main="No. of Workers/Team",xlab="Number of Workers/Team")
par(mfrow = c(3, 4))
hist(data[["actual_productivity"]],xlab="Actual Productivity", main = "")
hist(data[["targeted_productivity"]],xlab="Targeted Productivity",, main = "")
hist(data[["smv"]],xlab="Minutes Needed for Task",, main = "")
hist(data[["over_time"]],xlab="Minutes of Over Time", main = "")
hist(data[["wip"]],xlab="No. of Unfinished Products", main = "")
hist(data[["incentive"]],xlab="Fiancial Incentive",breaks=20, main = "")
hist(data[["idle_time"]],xlab="Interrupted Time", main = "")
hist(data[["idle_men"]],xlab="Interrupted Workers", main = "")
hist(data[["no_of_style_change"]],xlab="Number of Style Change", main = "")
hist(data[["no_of_workers"]],xlab="Number of Workers/Team", main = "")
mtext("Histogram Subplots", side = 3, line = 25, cex = 1.5)

We want to understand the overall data structure and hope to catch any patters. We then draw the frequency distribution of all numerical variables. From the numerical variables, we notice that there are tails for several variables:target productivity, over time, financial incentives, etc.

We draw box plots for all variables in original dataset:

# par(mfrow = c(3, 4))
# boxplot(data[["actual_productivity"]],main="Productivity Index",xlab="Productivity Index")
# boxplot(data[["targeted_productivity"]],main="Target Productivity",xlab="Productivity Index")
# boxplot(data[["smv"]],main="Standard Minute Value",xlab="Minutes Needed for Task")
# boxplot(data[["over_time"]],main="Over Time",xlab="Minutes of Over Time")
# boxplot(data[["wip"]],main="Work in Progress",xlab="Number of Unfinished Products")
# boxplot(data[["incentive"]],main="Financial Incentive",xlab="Bangladash Taka(BDT)",breaks=200)
# boxplot(data[["idle_time"]],main="Idle Time",xlab="Amount of Time When Production was Interrupted")
# boxplot(data[["idle_men"]],main="Idle Workers",xlab="Number of Idle Workers When Production was Interrupted")
# boxplot(data[["no_of_style_change"]],main="Number of Style Change",xlab="Number of Style Change")
# boxplot(data[["no_of_workers"]],main="Number of Workers in a Team",xlab="Number of Workers in a Team")

par(mfrow = c(3, 4))
boxplot(data[["actual_productivity"]],main="",xlab="Actual Productivity")
boxplot(data[["targeted_productivity"]],main="",xlab="Targeted Productivity")
boxplot(data[["smv"]],main="",xlab="Minutes Needed for Task")
boxplot(data[["over_time"]],main="",xlab="Minutes of Over Time")
boxplot(data[["wip"]],main="",xlab="No. of Unfinished Products")
boxplot(data[["incentive"]],main="",xlab="Financial Incentive",breaks=200)
boxplot(data[["idle_time"]],main="",xlab="Interrupted Time")
boxplot(data[["idle_men"]],main="",xlab="Interrupted Workers")
boxplot(data[["no_of_style_change"]],main="",xlab="No. of Style Change")
boxplot(data[["no_of_workers"]],main="",xlab="No. of Workers/Team")
mtext("Boxplot Subplots", side = 3, line = 25, cex = 1.5)

We further investigate the distribution via box plots. From the box plots, we see that most of the variables have outliers. This confirms what we see from the histogram - heav tails for most variables.

We create correlation map for numerical variables above:

To make sure we fit the right model & knowing from the fact that time series variables are generally correlated, we develop the correlation matrix. We define high correlation to be such that $|Cor(X,Y)| > 0.7, X, Y $ \(\{\)numerical variables\(\}\).

correlation_matrix <- cor(data[6:15])
ggcorrplot(correlation_matrix,lab = TRUE)

# Calculate the correlation matrix for columns 6 to 15
test_correlation_matrix <- cor(data[6:15])

# Convert the correlation matrix into a data frame
T_C_df <- as.data.frame(as.table(test_correlation_matrix))

# Create a correlation plot using ggplot2
p <- ggplot(T_C_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile() +
   scale_fill_gradient2(low = "blue3", mid = "white", high = "red3") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
  labs(title = "Correlation Matrix for Numerical Variables",fill = "Corr")

# Add labels to the tiles
p + geom_text(aes(label = round(Freq, 2), vjust = 1), color = "black", size = 3) +
  scale_x_discrete(name = "Variable Names") + 
  scale_y_discrete(name = "Variable Names")
## Warning: Removed 18 rows containing missing values (`geom_text()`).

Attempt to run correlation test and remove correlated features

# Assuming 'data' is your data frame with 15 features

# Create an empty list to store significant correlations
significant_correlations <- list()

# Loop through all pairs of features
for (i in 7:15) {
  for (j in 7:15) {
    # Extract the two features
    feature1 <- data[, i]
    feature2 <- data[, j]
    
    # Run correlation test
    correlation_test <- cor.test(feature1, feature2)
    
    # Check if the correlation is significant (e.g., p-value < 0.05)
    if (correlation_test$p.value < 0.05) {
      # Store the significant correlation along with feature names
      correlation_result <- list(
        Feature1 = colnames(data)[i],
        Feature2 = colnames(data)[j],
        Correlation = correlation_test$estimate,
        P_Value = correlation_test$p.value
      
      )
      #significant_correlations[[paste(i, j)]] <- correlation_result
      significant_correlations <- append(significant_correlations,list(correlation_result))
      
   }
    
  }
}

# Print or further analyze the significant correlations
correlation_df <- do.call(rbind, significant_correlations)
print(head(correlation_df))
##      Feature1 Feature2             Correlation P_Value     
## [1,] "smv"    "smv"                1           0           
## [2,] "smv"    "over_time"          0.6748874   6.38377e-160
## [3,] "smv"    "idle_time"          0.05686278  0.04919971  
## [4,] "smv"    "idle_men"           0.1059007   0.0002421556
## [5,] "smv"    "no_of_style_change" 0.3153875   4.720285e-29
## [6,] "smv"    "no_of_workers"      0.9121763   0

From the correlation matrix, we observe that smv and no_of_workers might be highly correlated (covariance = 0.91). Also over_time and no_of_workers might be highly correlated correlated (covariance = 0.73). We also observe that wip has missing covariance with all numerical variables but itself. After reviewing the distribution of wip. We realize that wip only has values when the department = sewing.

Naturally, We break the dataset into two parts based on the department they belong and revisit the collinearity issue.

Create pie chart for department to see the distribution:

We first look at the proportion of data by department.

counts_of_departments = c(length(data_org$department[data_org$department=="sewing"]), length(data_org$department[data_org$department=="finishing"]))
labels_of_departments = c("sewing", "finishing")
departments_pie_labels <- paste0(labels_of_departments, ", ", round(100 * counts_of_departments/sum(counts_of_departments), 2), "%")

# counts_of_quarters = c(length(data_prod$quarter[data_prod$quarter=="Quarter1"]), length(data_prod$quarter[data_prod$quarter=="Quarter2"]),length(data_prod$quarter[data_prod$quarter=="Quarter3"]), length(data_prod$quarter[data_prod$quarter=="Quarter4"]),length(data_prod$quarter[data_prod$quarter=="Quarter5"]))
# labels_of_quarters = c("Quarter1", "Quarter2","Quarter3","Quarter4","Quarter5")
# quarters_pie_labels <- paste0(labels_of_quarters, ", ", round(100 * counts_of_quarters/sum(counts_of_quarters), 2), "%")

# counts_of_days = c(length(data_prod$day[data_prod$day=="Monday"]),
# length(data_prod$day[data_prod$day=="Tuesday"]),
# length(data_prod$day[data_prod$day=="Wednesday"]),
# length(data_prod$day[data_prod$day=="Thursday"]),
# length(data_prod$day[data_prod$day=="Friday"]),
# length(data_prod$day[data_prod$day=="Saturday"]),
# length(data_prod$day[data_prod$day=="Sunday"]))
# labels_of_days = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
# days_pie_labels <- paste0(labels_of_days, ", ", round(100 * counts_of_days/sum(counts_of_days), 2), "%")
pie(counts_of_departments, labels = departments_pie_labels,main = "Pie Chart of Departments",col = rainbow(length(counts_of_departments)))

#pie(counts_of_quarters, labels = quarters_pie_labels,main = "Pie Chart of Quarters",col = rainbow(length(counts_of_quarters)))
#pie(counts_of_days, labels = days_pie_labels,main = "Pie Chart of Days",col = rainbow(length(counts_of_days)))

We see that sewing department has more corresponding rows than finishing.

Distribution of variables by departmens via box plots.

# ggplot(gather(data[,-c(1:5)]), aes(key,value)) +
# geom_boxplot() +
# facet_wrap(~key, scales = 'free') +
# labs(title = "Boxplots of all numerical variables")+
#   theme_minimal()
data_no_team = data_org[,c(1:4,6:15)]
ggplot(melt(data_no_team),aes(x=department,y=value)) +
facet_wrap(~variable, scales="free") +
geom_boxplot()+
  labs(title = "Boxplots of All Numerical Aariables by Departments")+
  theme_minimal()+
  scale_x_discrete(name = "Departments")
## Using date, quarter, department, day as id variables
## Warning: Removed 506 rows containing non-finite values (`stat_boxplot()`).

If we zoom in the box plots per department, sewing department has higher mean for must of the numerical values. For most of the numerical variables - targeted_productivity, smv, wip, idle_time, idel_men, and no_of_style_change, no_of_workers, and actual_productivity - sewing department have higher outliers. This once support the idea that distributions are different between departments. Thus, we should analyze the dataset seperately.

For sewing department:

sewing_correlation_matrix <- cor(sewing[6:14])
S_C_df <-as.data.frame(as.table(sewing_correlation_matrix))

# Create a correlation plot using ggplot2
p<-ggplot(S_C_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile() +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
  labs(title = "Correlation Plot")
 p+ geom_text(aes(label = round(Freq, 1), vjust = 1), color = "black", size = 3)

#ggcorrplot(sewing_correlation_matrix,lab = TRUE)

^Not sure why but apparently most of them are correlated in this sense….

We conclude that within the sewing department, we do not see correlation between variables based on the absolute covariance value. Thus, we might assume all variables are independent within the sewing department for now.

For finishing department:

finishing_correlation_matrix <- cor(finishing[,c(6:7,9:14)])
## Warning in cor(finishing[, c(6:7, 9:14)]): the standard deviation is zero
ggcorrplot(finishing_correlation_matrix,lab = TRUE)

head(finishing[,c(6:7,9:14)])
##    targeted_productivity  smv over_time incentive idle_time idle_men
## 2                   0.75 3.94       960         0         0        0
## 7                   0.75 3.94       960         0         0        0
## 14                  0.65 3.94       960         0         0        0
## 15                  0.75 2.90       960         0         0        0
## 16                  0.75 3.94      2160         0         0        0
## 17                  0.80 2.90       960         0         0        0
##    no_of_style_change no_of_workers
## 2                   0             8
## 7                   0             8
## 14                  0             8
## 15                  0             8
## 16                  0            18
## 17                  0             8
finishing[finishing["idle_time"]!=0]
## character(0)
finishing[finishing["idle_men"]!=0]
## character(0)
finishing[finishing["no_of_style_change"]!=0]
## character(0)

Notice that lots of the values are missing here for correlation between idle_time, idle_men, and no_of_style_change. That’s because they have 0 values for all observation. Thus, the correlation is \(\frac{0}{0}\).

Take covariance between productivity in team finishing and team sewing:

Then we wonder if teams in one department might collaborate. If their productivity are correlated, then it’s possible that some teams help the others.

finishing_file_path <- "/Users/karawei/Documents/GitHub/OK/combinedProductivityDataforFinishingTeams.csv"
sewing_file_path <- "/Users/karawei/Documents/GitHub/OK/combinedDataforProductivity.csv"
#finishing_file_path <-"C:/Users/ThinkPad/Documents/GitHub/OK/combinedProductivityDataforFinishingTeams.csv"
#sewing_file_path <-"C:/Users/ThinkPad/Documents/GitHub/OK/combinedDataforProductivity.csv"

# Read the CSV file into a data frame
finishing_data <- read.csv(finishing_file_path)
sewing_data <- read.csv(sewing_file_path)

finishing_correlation_matrix <- cor(finishing_data[2:12], use = "complete.obs")
sewing_correlation_matrix <- cor(sewing_data[2:13], use = "complete.obs")
ggcorrplot(finishing_correlation_matrix,lab="true")

ggcorrplot(sewing_correlation_matrix,lab="true")

F_C_df <-as.data.frame(as.table(finishing_correlation_matrix))
S_C_df <-as.data.frame(as.table(sewing_correlation_matrix))
# Create a correlation plot using ggplot2 sewing
p<-ggplot(S_C_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile() +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
  labs(title = "Correlation Plot sewing productivity")
 p+ geom_text(aes(label = round(Freq, 1), vjust = 1), color = "black", size = 3)

 # Create a correlation plot using ggplot2 finishing
p<-ggplot(F_C_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile() +
  scale_fill_gradient(low = "blue", high = "red") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
  labs(title = "Correlation Plot finishing productivity")
 p+ geom_text(aes(label = round(Freq, 1), vjust = 1), color = "black", size = 3)

sewing_remove = na.omit((sewing_data[2:13]))
nrow(sewing_remove)
## [1] 44
finishing_remove = na.omit((finishing_data))
nrow(finishing_remove)
## [1] 1
head(finishing_remove)
##          date team1productivityFinishing team2productivityFinishing
## 26 01/31/2015                  0.9718667                  0.9718667
##    team3productivityFinishing team4productivityFinishing
## 26                  0.9718667                  0.9718667
##    team5productivityFinishing team6productivityFinishing
## 26                  0.9718667                  0.9718667
##    team7productivityFinishing team8productivityFinishing
## 26                  0.9718667                  0.9718667
##    team9productivityFinishing team10productivityFinishing
## 26                  0.9718667                   0.9718667
##    team11productivityFinishing team12productivityFinishing
## 26                   0.9718667                   0.9718667

We first combine all productivity for teams into two dataset - one for the finishing department and other one for the sewing department. Because the length for teams are different -> some teams work on more dates and the other. So we remove all the rows with NA values. We are able to run the covariance matrix for team 1 - team 11 of the finishing department. We observe that finishing department has high covariance between almost all teams. . We also run the covariance matrix for team 1 - team 12 of the sewing department. Sewing department has no high correlation between teams. However, there are questions:

  1. After we remove all missing values, the correlation might not be too reliable

  2. I can’t run the solution for team12Productivity Fishing. Because there’s too many missing values. -> In fact, only one row (row 26 on date 01/31/2015) has complete values for producticity on all dates.

  3. I am not sure if calculating the productivity would mean anything statistically. Because we have not yet established relationship between productivity and other variables. And teams collaborate during the manufacturing process instead of at the final productivity. -> sol: But it shows productivity between teams might not be independent.

# ggplot(melt(data_org),aes(x=department)) +
# facet_wrap(~variable, scales="free") +
# geom_bar()+
#   labs(title = "Bar charts of all numerical variables by departments")+
#   theme_minimal()

Checking outliers for departments

From both the histograms and boxplots, we realize there are outliers for the sewing department. To deal with such problems, we

tinna

#Incentive

Between teams by department, we want to know as time goes, whether the change of productivity can be observed from other variables. I.e. we investigate the patterns of productivity v.s. variables.

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$incentive/max(sewingTeam01$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam01$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam01$smv/max(sewingTeam01$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam01$wip/max(sewingTeam01$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam01$over_time/max(sewingTeam01$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam01$idle_time, color = "Idle_time"))+
  geom_line(aes(y = sewingTeam01$idle_men, color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam01$no_of_style_change/max(sewingTeam01$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam01$no_of_workers/max(sewingTeam01$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam1 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam02, aes(x = sewingTeam02$date)) +
  geom_line(aes(y= sewingTeam02$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam02$incentive/max(sewingTeam02$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam02$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam02$smv/max(sewingTeam02$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam02$wip/max(sewingTeam02$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam02$over_time/max(sewingTeam02$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam02$idle_time/max(sewingTeam02$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam02$idle_men/max(sewingTeam02$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam02$no_of_style_change/max(sewingTeam02$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam02$no_of_workers/max(sewingTeam02$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam2 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam03, aes(x = sewingTeam03$date)) +
  geom_line(aes(y= sewingTeam03$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam03$incentive/max(sewingTeam03$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam03$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam03$smv/max(sewingTeam03$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam03$wip/max(sewingTeam03$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam03$over_time/max(sewingTeam03$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam03$idle_time, color = "Idle_time"))+
  geom_line(aes(y = sewingTeam03$idle_men, color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam03$no_of_style_change/max(sewingTeam03$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam03$no_of_workers/max(sewingTeam03$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam3 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam04, aes(x = sewingTeam04$date)) +
  geom_line(aes(y= sewingTeam04$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam04$incentive/max(sewingTeam04$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam04$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam04$smv/max(sewingTeam04$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam04$wip/max(sewingTeam04$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam04$over_time/max(sewingTeam04$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam04$idle_time/max(sewingTeam04$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam04$idle_men/max(sewingTeam04$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam04$no_of_style_change/max(sewingTeam04$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam04$no_of_workers/max(sewingTeam04$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam4 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam05, aes(x = sewingTeam05$date)) +
  geom_line(aes(y= sewingTeam05$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam05$incentive/max(sewingTeam05$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam05$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam05$smv/max(sewingTeam05$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam05$wip/max(sewingTeam05$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam05$over_time/max(sewingTeam05$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam05$idle_time/max(sewingTeam05$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam05$idle_men/max(sewingTeam05$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam05$no_of_style_change/max(sewingTeam05$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam05$no_of_workers/max(sewingTeam05$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam5 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam06, aes(x = sewingTeam06$date)) +
  geom_line(aes(y= sewingTeam06$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam06$incentive/max(sewingTeam06$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam06$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam06$smv/max(sewingTeam06$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam06$wip/max(sewingTeam06$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam06$over_time/max(sewingTeam06$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam06$idle_time, color = "Idle_time"))+
  geom_line(aes(y = sewingTeam06$idle_men, color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam06$no_of_style_change/max(sewingTeam06$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam06$no_of_workers/max(sewingTeam06$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam6 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam07, aes(x = sewingTeam07$date)) +
  geom_line(aes(y= sewingTeam07$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam07$incentive/max(sewingTeam07$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam07$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam07$smv/max(sewingTeam07$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam07$wip/max(sewingTeam07$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam07$over_time/max(sewingTeam07$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam07$idle_time/max(sewingTeam07$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam07$idle_men/max(sewingTeam07$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam07$no_of_style_change/max(sewingTeam07$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam07$no_of_workers/max(sewingTeam07$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam7 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam08, aes(x = sewingTeam08$date)) +
  geom_line(aes(y= sewingTeam08$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam08$incentive/max(sewingTeam08$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam08$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam08$smv/max(sewingTeam08$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam08$wip/max(sewingTeam08$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam08$over_time/max(sewingTeam08$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam08$idle_time/max(sewingTeam08$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam08$idle_men/max(sewingTeam08$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam08$no_of_style_change/max(sewingTeam08$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam08$no_of_workers/max(sewingTeam08$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam8 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam09, aes(x = sewingTeam09$date)) +
  geom_line(aes(y= sewingTeam09$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam09$incentive/max(sewingTeam09$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam09$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam09$smv/max(sewingTeam09$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam09$wip/max(sewingTeam09$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam09$over_time/max(sewingTeam09$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam09$idle_time, color = "Idle_time"))+
  geom_line(aes(y = sewingTeam09$idle_men, color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam09$no_of_style_change/max(sewingTeam09$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam09$no_of_workers/max(sewingTeam09$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam9 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam10, aes(x = sewingTeam10$date)) +
  geom_line(aes(y= sewingTeam10$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam10$incentive/max(sewingTeam10$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam10$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam10$smv/max(sewingTeam10$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam10$wip/max(sewingTeam10$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam10$over_time/max(sewingTeam10$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam10$idle_time/max(sewingTeam10$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam10$idle_men/max(sewingTeam10$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam10$no_of_style_change/max(sewingTeam10$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam10$no_of_workers/max(sewingTeam10$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam10 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam11, aes(x = sewingTeam11$date)) +
  geom_line(aes(y= sewingTeam11$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam11$incentive/max(sewingTeam11$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam11$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam11$smv/max(sewingTeam11$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam11$wip/max(sewingTeam11$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam11$over_time/max(sewingTeam11$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam11$idle_time/max(sewingTeam11$idle_time), color = "Idle_time"))+
  geom_line(aes(y = sewingTeam11$idle_men/max(sewingTeam11$idle_men), color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam11$no_of_style_change/max(sewingTeam11$no_of_style_change), color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam11$no_of_workers/max(sewingTeam11$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam11 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam12, aes(x = sewingTeam12$date)) +
  geom_line(aes(y= sewingTeam12$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam12$incentive/max(sewingTeam12$incentive), color = "Normalized Incentive"))+
  geom_line(aes(y = sewingTeam12$targeted_productivity, color = "Targeted Productivity"))+
   geom_line(aes(y = sewingTeam12$smv/max(sewingTeam12$smv), color = "Normalized SMV"))+
  geom_line(aes(y = sewingTeam12$wip/max(sewingTeam12$wip), color = "Normalized WIP"))+
  geom_line(aes(y = sewingTeam12$over_time/max(sewingTeam12$over_time), color = "Normalized Overtime"))+
  geom_line(aes(y = sewingTeam12$idle_time, color = "Idle_time"))+
  geom_line(aes(y = sewingTeam12$idle_men, color = "Idle_Worker"))+
  geom_line(aes(y = sewingTeam12$no_of_style_change, color = "Normalized No. of style Change"))+
  geom_line(aes(y = sewingTeam12$no_of_workers/max(sewingTeam12$no_of_workers), color = "Normalized No. of workers"))+
  
  
  labs(x="Date", y = "Scale as Productivity", title = "SewingTeam12 Productivity vs Other Variables")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

sewingTeam01$idle_time[sewingTeam01$idle_time == max(sewingTeam01$idle_time)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam02$idle_time[sewingTeam02$idle_time == max(sewingTeam02$idle_time)]
## [1] 6.5
sewingTeam03$idle_time[sewingTeam03$idle_time == max(sewingTeam03$idle_time)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam04$idle_time[sewingTeam04$idle_time == max(sewingTeam04$idle_time)]
## [1] 150
sewingTeam05$idle_time[sewingTeam05$idle_time == max(sewingTeam05$idle_time)]
## [1] 90
sewingTeam06$idle_time[sewingTeam06$idle_time == max(sewingTeam06$idle_time)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam07$idle_time[sewingTeam07$idle_time == max(sewingTeam07$idle_time)]
## [1] 270
sewingTeam08$idle_time[sewingTeam08$idle_time == max(sewingTeam08$idle_time)]
## [1] 300
sewingTeam09$idle_time[sewingTeam09$idle_time == max(sewingTeam09$idle_time)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam10$idle_time[sewingTeam10$idle_time == max(sewingTeam10$idle_time)]
## [1] 8 8
sewingTeam11$idle_time[sewingTeam11$idle_time == max(sewingTeam11$idle_time)]
## [1] 4
sewingTeam12$idle_time[sewingTeam12$idle_time == max(sewingTeam12$idle_time)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam01$idle_men[sewingTeam01$idle_men == max(sewingTeam01$idle_men)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam02$idle_men[sewingTeam02$idle_men == max(sewingTeam02$idle_men)]
## [1] 30
sewingTeam03$idle_men[sewingTeam03$idle_men == max(sewingTeam03$idle_men)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam04$idle_men[sewingTeam04$idle_men == max(sewingTeam04$idle_men)]
## [1] 15
sewingTeam05$idle_men[sewingTeam05$idle_men == max(sewingTeam05$idle_men)]
## [1] 25
sewingTeam06$idle_men[sewingTeam06$idle_men == max(sewingTeam06$idle_men)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam07$idle_men[sewingTeam07$idle_men == max(sewingTeam07$idle_men)]
## [1] 45
sewingTeam08$idle_men[sewingTeam08$idle_men == max(sewingTeam08$idle_men)]
## [1] 37
sewingTeam09$idle_men[sewingTeam09$idle_men == max(sewingTeam09$idle_men)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam10$idle_men[sewingTeam10$idle_men == max(sewingTeam10$idle_men)]
## [1] 35 35
sewingTeam11$idle_men[sewingTeam11$idle_men == max(sewingTeam11$idle_men)]
## [1] 20
sewingTeam12$idle_men[sewingTeam12$idle_men == max(sewingTeam12$idle_men)]
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

# ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
#   geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
#   geom_line(aes(y = sewingTeam01$incentive/max(sewingTeam01$incentive), color = "Normalized Incentive"))+
#   labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Targeted Normalized Incentive")+
#   scale_color_manual(values = c("Actual Productivity" = "black","Normalized Incentive" = "red")) +
#   theme_minimal() +
#   theme(legend.position = "top")

#Target Productivity ######################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$targeted_productivity, color = "Targeted Productivity"))+
  labs(x="Date", y = "Productivity", title = "SewingTeam1 Productivity vs Targeted Productivity")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Targeted Productivity" = "blue")) +
  theme_minimal() +
  theme(legend.position = "top")

#SMV ######################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$smv/max(sewingTeam01$smv), color = "Normalized SMV"))+
  labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized SMV")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized SMV" = "green")) +
  theme_minimal() +
  theme(legend.position = "top")

#WIP #####################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$wip/max(sewingTeam01$wip), color = "Normalized WIP"))+
  labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized WIP")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized WIP" = "purple")) +
  theme_minimal() +
  theme(legend.position = "top")

#overtime ###########################################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$over_time/max(sewingTeam01$over_time), color = "Normalized Overtime"))+
  labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized Overtime")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Overtime" = "magenta")) +
    theme_minimal() +
    theme(legend.position = "top")

#Idle_time ############################################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$idle_time, color = "Idle_time"))+
  labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Idle_time")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Idle_time" = "cyan")) +
  theme_minimal() +
  theme(legend.position = "top")

#Idle_men ############################################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$idle_men, color = "Idle_Worker"))+
  labs(x="Date", y = "Productivity", title = "SewingTeam1 Productivity vs Idle_Worker")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Idle_Worker" = "brown")) +
  theme_minimal() +
  theme(legend.position = "top")

#No. of style Change ############################################################################################################

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$no_of_style_change/max(sewingTeam01$no_of_style_change), color = "Normalized No. of style Change"))+
  labs(x="Date", y = "Productivity", title = "SewingTeam1 Productivity vs Normalized No. of style Change")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized No. of style Change" = "pink")) +
  theme_minimal() +
  theme(legend.position = "top")

#Normalized No. of workers

ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
  geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
  geom_line(aes(y = sewingTeam01$no_of_workers/max(sewingTeam01$no_of_workers), color = "Normalized No. of workers"))+
  labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized No. of workers")+
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized No. of workers" = "orange")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(data = sewingTeam01, aes(x = date)) +
  geom_line(aes(y = actual_productivity, color = "Actual Productivity")) +
  geom_line(aes(y = incentive / max(incentive), color = "Normalized No. of workers")) +
  labs(x = "Date", y = "Productivity", title = "SewingTeam1 Productivity vs Normalized Incentive") +
  scale_color_manual(values = c("Actual Productivity" = "black", "Normalized No. of workers" = "red")) +
  theme_minimal() +
  theme(legend.position = "top") +
  guides(color = guide_legend(title = NULL))

Calculating the mean of each team variable

# Team01
mean01=mean(sewingTeam01$actual_productivity)
var01 =var(sewingTeam01$actual_productivity)
meanWorker01=mean(sewingTeam01$no_of_workers)
meanTP01=mean(sewingTeam01$targeted_productivity)
meanSMV01=mean(sewingTeam01$smv)
meanWIP01=mean(sewingTeam01$wip)
meanInsentive01=mean(sewingTeam01$incentive)
meanOvertime01=mean(sewingTeam01$over_time)

#Team02
mean02=mean(sewingTeam02$actual_productivity)
var02 =var(sewingTeam02$actual_productivity)
meanWorker02=mean(sewingTeam02$no_of_workers)
meanTP02=mean(sewingTeam02$targeted_productivity)
meanSMV02=mean(sewingTeam02$smv)
meanWIP02=mean(sewingTeam02$wip)
meanInsentive02=mean(sewingTeam02$incentive)
meanOvertime02=mean(sewingTeam02$over_time)

#Team03
mean03=mean(sewingTeam03$actual_productivity)
var03 =var(sewingTeam03$actual_productivity)
meanWorker03=mean(sewingTeam03$no_of_workers)
meanTP03=mean(sewingTeam03$targeted_productivity)
meanSMV03=mean(sewingTeam03$smv)
meanWIP03=mean(sewingTeam03$wip)
meanInsentive03=mean(sewingTeam03$incentive)
meanOvertime03=mean(sewingTeam03$over_time)

#Team04
mean04=mean(sewingTeam04$actual_productivity)
var04 =var(sewingTeam04$actual_productivity)
meanWorker04=mean(sewingTeam04$no_of_workers)
meanTP04=mean(sewingTeam04$targeted_productivity)
meanSMV04=mean(sewingTeam04$smv)
meanWIP04=mean(sewingTeam04$wip)
meanInsentive04=mean(sewingTeam04$incentive)
meanOvertime04=mean(sewingTeam04$over_time)

#Team05
mean05=mean(sewingTeam05$actual_productivity)
var05 =var(sewingTeam05$actual_productivity)
meanWorker05=mean(sewingTeam05$no_of_workers)
meanTP05=mean(sewingTeam05$targeted_productivity)
meanSMV05=mean(sewingTeam05$smv)
meanWIP05=mean(sewingTeam05$wip)
meanInsentive05=mean(sewingTeam05$incentive)
meanOvertime05=mean(sewingTeam05$over_time)

#Team06
mean06=mean(sewingTeam06$actual_productivity)
var06 =var(sewingTeam06$actual_productivity)
meanWorker06=mean(sewingTeam06$no_of_workers)
meanTP06=mean(sewingTeam06$targeted_productivity)
meanSMV06=mean(sewingTeam05$smv)
meanWIP06=mean(sewingTeam06$wip)
meanInsentive06=mean(sewingTeam06$incentive)
meanOvertime06=mean(sewingTeam06$over_time)

#Team07
mean07=mean(sewingTeam07$actual_productivity)
var07 =var(sewingTeam07$actual_productivity)
meanWorker07=mean(sewingTeam07$no_of_workers)
meanTP07=mean(sewingTeam07$targeted_productivity)
meanSMV07=mean(sewingTeam07$smv)
meanWIP07=mean(sewingTeam07$wip)
meanInsentive07=mean(sewingTeam07$incentive)
meanOvertime07=mean(sewingTeam07$over_time)

#Team08
mean08=mean(sewingTeam08$actual_productivity)
var08 =var(sewingTeam08$actual_productivity)
meanWorker08=mean(sewingTeam08$no_of_workers)
meanTP08=mean(sewingTeam08$targeted_productivity)
meanSMV08=mean(sewingTeam08$smv)
meanWIP08=mean(sewingTeam08$wip)
meanInsentive08=mean(sewingTeam08$incentive)
meanOvertime08=mean(sewingTeam08$over_time)

#Team09
mean09=mean(sewingTeam09$actual_productivity)
var09 =var(sewingTeam09$actual_productivity)
meanWorker09=mean(sewingTeam09$no_of_workers)
meanTP09=mean(sewingTeam09$targeted_productivity)
meanSMV09=mean(sewingTeam09$smv)
meanWIP09=mean(sewingTeam09$wip)
meanInsentive09=mean(sewingTeam09$incentive)
meanOvertime09=mean(sewingTeam09$over_time)

#Team10
mean10=mean(sewingTeam10$actual_productivity)
var10 =var(sewingTeam10$actual_productivity)
meanWorker10=mean(sewingTeam10$no_of_workers)
meanTP10=mean(sewingTeam10$targeted_productivity)
meanSMV10=mean(sewingTeam10$smv)
meanWIP10=mean(sewingTeam10$wip)
meanInsentive10=mean(sewingTeam10$incentive)
meanOvertime10=mean(sewingTeam10$over_time)

#Team11
mean11=mean(sewingTeam11$actual_productivity)
var11 =var(sewingTeam11$actual_productivity)
meanWorker11=mean(sewingTeam11$no_of_workers)
meanTP11=mean(sewingTeam11$targeted_productivity)
meanSMV11=mean(sewingTeam11$smv)
meanWIP11=mean(sewingTeam11$wip)
meanInsentive11=mean(sewingTeam11$incentive)
meanOvertime11=mean(sewingTeam11$over_time)

#Team12
mean12=mean(sewingTeam12$actual_productivity)
var12 =var(sewingTeam12$actual_productivity)
meanWorker12=mean(sewingTeam12$no_of_workers)
meanTP12=mean(sewingTeam12$targeted_productivity)
meanSMV12=mean(sewingTeam12$smv)
meanWIP12=mean(sewingTeam12$wip)
meanInsentive12=mean(sewingTeam12$incentive)
meanOvertime12=mean(sewingTeam12$over_time)



meanOfSewing <- c(mean01,mean02,mean03,mean04,mean05,mean06,mean07,mean08,mean09,mean10,mean11,mean12)
meanofWorkers<-c(meanWorker01,meanWorker02,meanWorker03,meanWorker04,meanWorker05,meanWorker06,meanWorker07,meanWorker08,meanWorker09,meanWorker10,meanWorker11,meanWorker12)
meanTP<-c(meanTP01,meanTP02,meanTP03,meanTP04,meanTP05,meanTP06,meanTP07,meanTP08,meanTP09,meanTP10,meanTP11,meanTP12)
meanSMV<-c(meanSMV01,meanSMV02,meanSMV03,meanSMV04,meanSMV05,meanSMV06,meanSMV07,meanSMV08,meanSMV09,meanSMV10,meanSMV11,meanSMV12)
meanWIP<-c(meanWIP01,meanWIP02,meanWIP03,meanWIP04,meanWIP05,meanWIP06,meanWIP07,meanWIP08,meanWIP09,meanWIP10,meanWIP11,meanWIP12)
meanInsentive<-c(meanInsentive01,meanInsentive02,meanInsentive03,meanInsentive04,meanInsentive05,meanInsentive06,meanInsentive07,meanInsentive08,meanInsentive09,meanInsentive10,meanInsentive11,meanInsentive12)
meanOvertime<-c(meanOvertime01, meanOvertime02,meanOvertime03,meanOvertime04,meanOvertime05,meanOvertime06,meanOvertime07,meanOvertime08,meanOvertime09,meanOvertime10,meanOvertime11,meanOvertime12)

#investigate relationship between variables and  productivity
plot(meanOfSewing)

hist(meanOfSewing)

plot(meanofWorkers,meanOfSewing,main="Mean productivity vs Mean Number of workers")

plot(meanTP,meanOfSewing,main="Mean productivity vs Mean Target Productivity")

plot(meanSMV,meanOfSewing,main="Mean productivity vs Mean SMV")

plot(meanWIP,meanOfSewing,main="Mean productivity vs Mean WIP")

plot(meanInsentive,meanOfSewing,main="Mean productivity vs Mean Incentive")

plot(meanOvertime,meanOfSewing,main="Mean productivity vs Mean Overtime")

# Explanation behind these plots,

#The goal of these plots is to see why some teams perform better than others.
#Is it because they have more people on their team, is it because they are getting paid more.
# SO we took the mean of some of the variables by team and plotted them against the mean of each team's productivity
# this showed us some relationships that revealed to us what variables on agerage effected the productivity of sewing and 
#which variables didn't. 






#box plot of productivity by team
par(mfrow = c(3, 4))
boxplot(sewingTeam01$actual_productivity,main="Productivity Index",xlab="Sewing Team01")
boxplot(sewingTeam02$actual_productivity,main="Productivity Index",xlab="Sewing Team02")
boxplot(sewingTeam03$actual_productivity,main="Productivity Index",xlab="Sewing Team03")
boxplot(sewingTeam04$actual_productivity,main="Productivity Index",xlab="Sewing Team04")
boxplot(sewingTeam05$actual_productivity,main="Productivity Index",xlab="Sewing Team05")
boxplot(sewingTeam06$actual_productivity,main="Productivity Index",xlab="Sewing Team06")
boxplot(sewingTeam07$actual_productivity,main="Productivity Index",xlab="Sewing Team07")
boxplot(sewingTeam08$actual_productivity,main="Productivity Index",xlab="Sewing Team08")
boxplot(sewingTeam09$actual_productivity,main="Productivity Index",xlab="Sewing Team09")
boxplot(sewingTeam10$actual_productivity,main="Productivity Index",xlab="Sewing Team10")
boxplot(sewingTeam11$actual_productivity,main="Productivity Index",xlab="Sewing Team11")
boxplot(sewingTeam12$actual_productivity,main="Productivity Index",xlab="Sewing Team12")

Data Cleaning: ANOVA

fit.department <- aov(actual_productivity ~ department, data=data_org)
summary(fit.department)
##               Df Sum Sq Mean Sq F value  Pr(>F)   
## department     1   0.28 0.27958   9.246 0.00241 **
## Residuals   1195  36.13 0.03024                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(fit.department)

Apparently the normality assumption is not met. So we can’t use ANOVA here. The alternative test we consider is the Kruskal-Wallis H Test.

p1 <- hist(sewing$actual_productivity,plot = FALSE)        
p2 <- hist(finishing$actual_productivity, plot = FALSE)       
plot( p1,,xlab = "Actual Producticity",main = "Histograms of Actual Producticity by Departments", col=rgb(0,0,1,1/4), xlim=c(0,1.5)) 
plot( p2, col=rgb(1,0,0,1/4), xlim=c(0,1.5), add=T)

To satisfy the assumption for kruskal test, the distributions of two categorical variables must have the same shape. We have shown above that they do.

kruskal.test(actual_productivity ~ department, data = data_org)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  actual_productivity by department
## Kruskal-Wallis chi-squared = 27.288, df = 1, p-value = 1.753e-07

kruskal tes for outperformance team

sewing$team <- as.character(sewing$team)
kruskal.test(actual_productivity ~ team, data = sewing)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  actual_productivity by team
## Kruskal-Wallis chi-squared = 101.03, df = 11, p-value < 2.2e-16
test_result <- dunnTest(actual_productivity ~ team,
  data = sewing,
  method = "bonferroni"
)
## Warning: team was coerced to a factor.
test_result
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Bonferroni method.
##    Comparison           Z      P.unadj        P.adj
## 1      1 - 10  4.38581434 1.155527e-05 7.626475e-04
## 2      1 - 11  6.25321157 4.020968e-10 2.653839e-08
## 3     10 - 11  1.83837959 6.600649e-02 1.000000e+00
## 4      1 - 12  1.60840826 1.077458e-01 1.000000e+00
## 5     10 - 12 -2.82746928 4.691752e-03 3.096556e-01
## 6     11 - 12 -4.70659942 2.518832e-06 1.662429e-04
## 7       1 - 2  2.74808085 5.994522e-03 3.956385e-01
## 8      10 - 2 -1.64502855 9.996392e-02 1.000000e+00
## 9      11 - 2 -3.49752882 4.695900e-04 3.099294e-02
## 10     12 - 2  1.16832005 2.426777e-01 1.000000e+00
## 11      1 - 3  2.34301290 1.912872e-02 1.000000e+00
## 12     10 - 3 -2.07092252 3.836604e-02 1.000000e+00
## 13     11 - 3 -3.93534926 8.307579e-05 5.483002e-03
## 14     12 - 3  0.75109332 4.525965e-01 1.000000e+00
## 15      2 - 3 -0.41875715 6.753936e-01 1.000000e+00
## 16      1 - 4  3.45599819 5.482590e-04 3.618509e-02
## 17     10 - 4 -0.97150793 3.312954e-01 1.000000e+00
## 18     11 - 4 -2.83442850 4.590773e-03 3.029910e-01
## 19     12 - 4  1.87217092 6.118296e-02 1.000000e+00
## 20      2 - 4  0.68764129 4.916787e-01 1.000000e+00
## 21      3 - 4  1.11305970 2.656828e-01 1.000000e+00
## 22      1 - 5  7.44883491 9.416812e-14 6.215096e-12
## 23     10 - 5  3.05784002 2.229386e-03 1.471394e-01
## 24     11 - 5  1.23782560 2.157807e-01 1.000000e+00
## 25     12 - 5  5.92426818 3.136912e-09 2.070362e-07
## 26      2 - 5  4.71000539 2.477102e-06 1.634888e-04
## 27      3 - 5  5.15120799 2.588140e-07 1.708172e-05
## 28      4 - 5  4.06011516 4.904852e-05 3.237202e-03
## 29      1 - 6  4.30731410 1.652489e-05 1.090643e-03
## 30     10 - 6 -0.11633471 9.073873e-01 1.000000e+00
## 31     11 - 6 -1.97178637 4.863400e-02 1.000000e+00
## 32     12 - 6  2.73481305 6.241571e-03 4.119437e-01
## 33      2 - 6  1.54281451 1.228758e-01 1.000000e+00
## 34      3 - 6  1.97200741 4.860876e-02 1.000000e+00
## 35      4 - 6  0.86264213 3.883343e-01 1.000000e+00
## 36      5 - 6 -3.20116745 1.368720e-03 9.033550e-02
## 37      1 - 7  5.79245499 6.936492e-09 4.578085e-07
## 38     10 - 7  1.45195727 1.465135e-01 1.000000e+00
## 39     11 - 7 -0.35741589 7.207805e-01 1.000000e+00
## 40     12 - 7  4.26587430 1.991209e-05 1.314198e-03
## 41      2 - 7  3.08223190 2.054547e-03 1.356001e-01
## 42      3 - 7  3.51033277 4.475462e-04 2.953805e-02
## 43      4 - 7  2.42684177 1.523090e-02 1.000000e+00
## 44      5 - 7 -1.57203638 1.159421e-01 1.000000e+00
## 45      6 - 7  1.57946885 1.142286e-01 1.000000e+00
## 46      1 - 8  5.75008114 8.920063e-09 5.887241e-07
## 47     10 - 8  1.38965360 1.646341e-01 1.000000e+00
## 48     11 - 8 -0.42861222 6.682054e-01 1.000000e+00
## 49     12 - 8  4.21619108 2.484632e-05 1.639857e-03
## 50      2 - 8  3.02738708 2.466779e-03 1.628074e-01
## 51      3 - 8  3.45728825 5.456409e-04 3.601230e-02
## 52      4 - 8  2.36860115 1.785550e-02 1.000000e+00
## 53      5 - 8 -1.64853377 9.924319e-02 1.000000e+00
## 54      6 - 8  1.51728524 1.291947e-01 1.000000e+00
## 55      7 - 8 -0.06833371 9.455200e-01 1.000000e+00
## 56      1 - 9  3.63854406 2.741837e-04 1.809613e-02
## 57     10 - 9 -0.76957052 4.415547e-01 1.000000e+00
## 58     11 - 9 -2.62272825 8.722882e-03 5.757102e-01
## 59     12 - 9  2.06371433 3.904481e-02 1.000000e+00
## 60      2 - 9  0.88259485 3.774552e-01 1.000000e+00
## 61      3 - 9  1.30704722 1.911967e-01 1.000000e+00
## 62      4 - 9  0.19956131 8.418237e-01 1.000000e+00
## 63      5 - 9 -3.84416077 1.209657e-04 7.983735e-03
## 64      6 - 9 -0.65938640 5.096477e-01 1.000000e+00
## 65      7 - 9 -2.22075245 2.636773e-02 1.000000e+00
## 66      8 - 9 -2.16175709 3.063690e-02 1.000000e+00
# library(PMCMRplus)
# result <- PMCMRplus::kwManyOneDunnTest(x = sewing$actual_productivity, g = sewing$team, data = sewing, method = "bonferroni")
# result
# sewing$team <- as.integer(sewing$team)
# library(emmeans)
# fit.sewing <- aov(actual_productivity^3 ~ team, data=sewing)
# autoplot(fit.sewing)
# sewing.mc <- emmeans(fit.sewing, "team",data=sewing)
# contrast(sewing.mc, "trt.vs.ctrl", reference = "1")

Linear Model with Potential Transformation

library(ggfortify)
sewing.fit <- lm(actual_productivity^3 ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=sewing)
autoplot(sewing.fit)

PCA for outlier detection

library(stats)
#sewing_std <- scale(sewingTeam01[,c(6:11,14)])
#threshold <- 20
outlier_dates_01 <- as.Date(character(0), format = "%Y-%m-%d")
sewing_pca <- prcomp(sewingTeam01[,c(6:10,13:14)], center = TRUE,scale. = TRUE)
mahalanobis_dist <- mahalanobis(sewing_pca$x, colMeans(sewing_pca$x), cov(sewing_pca$x))
sewingTeam01 <- cbind(sewingTeam01,mahalanobis_dist)
outlier_function <- function(threshold,dataset){
  outlier_dates <- as.Date(character(0), format = "%Y-%m-%d")
  for (i in 1:nrow(dataset)) {
  if (dataset$mahalanobis_dist[i] > threshold) {
        outlier_dates <- c(outlier_dates, as.Date(dataset$date[i], format = "%Y-%m-%d"))
        
  }
  
  }
  return(outlier_dates)
}
percentile_95 <- quantile(mahalanobis_dist, 0.95) #find the threshold value
sewingTeam01_outlier <- outlier_function(percentile_95,sewingTeam01)
sewingTeam01_outlierrmv <- sewingTeam01[!sewingTeam01$date %in% sewingTeam01_outlier, ]
# team_no_outliers <- function(team){
#   outlierrmv <- team[!team$date %in% outlier_function(27,team), ]
#   paste0(names(team), "_rmv") <- oulierrmv
#   return(paste0(names(team), "_rmv"))
# }

In the codes above, we try to reduce the dimensionality of data set using pca. We then set a threshold for mohalanobis_dist. We perform outlier detection for any mohalanobis distance greater than the threshold. We keep adjusting the threshold value untile most values are excluded. However, one concern is that our data, as shown in hypothesis testing, is not normally distributed. Therefore, we should consider each team seperately.

# library(ClassDiscovery)
# spca <- SamplePCA(sewing[6:15])
# spca
library(ggfortify)
testfit <- aov(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=sewingTeam01)
autoplot(testfit)

summary(testfit)
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## targeted_productivity  1 0.6844  0.6844 287.546  < 2e-16 ***
## smv                    1 0.1896  0.1896  79.657 9.14e-12 ***
## wip                    1 0.0140  0.0140   5.881   0.0191 *  
## over_time              1 0.0058  0.0058   2.445   0.1245    
## incentive              1 0.3163  0.3163 132.900 1.97e-15 ***
## no_of_style_change     1 0.0155  0.0155   6.519   0.0139 *  
## no_of_workers          1 0.0142  0.0142   5.978   0.0182 *  
## Residuals             48 0.1142  0.0024                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
outliers_to_exclude<- c("   
2015-02-25","2015-02-24","2015-01-18")
sewingTeam01_outlierrmv <- sewingTeam01[!sewingTeam01$date %in% outliers_to_exclude, ]
testfit_rmv <- aov(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=sewingTeam01_outlierrmv)
autoplot(testfit_rmv)

Testing Model via Fixed Traing Set and Test Set

We first fix the training set and the test set to be 80% and 20% of the dataset. We performed model selection and model validation accordingly.

set.seed(123)
sample_indices <- sample(nrow(sewingTeam01), size = 0.8 * nrow(sewingTeam01))
train.data <- sewingTeam01[sample_indices, ]
test.data <- sewingTeam01[-sample_indices, ]

Linear Model with BIC

ln.fit = lm(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=train.data)
summary(ln.fit)
## 
## Call:
## lm(formula = actual_productivity ~ targeted_productivity + smv + 
##     wip + over_time + incentive + idle_time + idle_men + no_of_style_change + 
##     no_of_workers, data = train.data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.143980 -0.011009  0.001305  0.010829  0.160442 
## 
## Coefficients: (2 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3.486e-01  2.774e-01   1.257   0.2169    
## targeted_productivity  1.753e-01  1.979e-01   0.886   0.3816    
## smv                   -4.992e-03  1.866e-03  -2.676   0.0112 *  
## wip                   -7.321e-07  3.563e-06  -0.205   0.8384    
## over_time             -1.333e-06  3.534e-06  -0.377   0.7082    
## incentive              3.555e-03  5.353e-04   6.641 9.75e-08 ***
## idle_time                     NA         NA      NA       NA    
## idle_men                      NA         NA      NA       NA    
## no_of_style_change    -9.230e-02  4.648e-02  -1.986   0.0547 .  
## no_of_workers          4.052e-03  3.987e-03   1.016   0.3163    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05248 on 36 degrees of freedom
## Multiple R-squared:  0.9042, Adjusted R-squared:  0.8856 
## F-statistic: 48.54 on 7 and 36 DF,  p-value: < 2.2e-16
library("leaps")
library(gridExtra)
prod.gsub <- regsubsets(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=train.data, nbest=4, nvmax=9)
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 2 linear dependencies found
## Reordering variables and trying again:
stats <- summary(prod.gsub)
gsub.df <- data.frame(Model.Number=1:length(stats$adjr2), Adjusted.R2=stats$adjr2, BIC=stats$bic)
p1 <- ggplot(gsub.df, aes(x=Model.Number, y=Adjusted.R2)) + 
  geom_line() + 
  geom_point(color="red", size=2) + 
  theme_minimal() +
  ylab("Adjusted R-squared") + xlab("Model Number")
p2 <- ggplot(gsub.df, aes(x=Model.Number, y=BIC)) + 
  geom_line() + 
  geom_point(color="red", size=2) + 
  theme_minimal() +
  ylab("BIC") + xlab("Model Number")
grid.arrange(p1,p2, nrow=2)

coef(prod.gsub, which.max(gsub.df$Adjusted.R2))
##  (Intercept)          smv    incentive    idle_time 
##  0.517009582 -0.002033477  0.004991662  0.000000000
max(gsub.df$Adjusted.R2)
## [1] 0.8929808
coef(prod.gsub, which.min(gsub.df$BIC))
##  (Intercept)          smv    incentive    idle_time 
##  0.517009582 -0.002033477  0.004991662  0.000000000
gsub.df$Adjusted.R2[which.min(gsub.df$BIC)]
## [1] 0.8929808

Lasso Model

training_data_frame <- as.data.frame(train.data)
test_data_frame <- as.data.frame(test.data)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:reshape':
## 
##     expand
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-8
library(caret)
## Loading required package: lattice
# Assuming you have a data frame named sewingTeam01
set.seed(123)  # for reproducibility
best_lambda <- c(0.01, 0.02, 0.04, 0.06, 0.1, 0.5, 0.8, 1)
# Define the formula
formula <- as.formula("actual_productivity ~ targeted_productivity + smv + wip + over_time + incentive + idle_time + idle_men + no_of_style_change + no_of_workers")

# Create a model specification for Lasso regression
lasso_model <- train(
  formula,
  data = train.data,
  method = "glmnet",
  trControl = trainControl(method = "LOOCV"),  # 5-fold cross-validation
 tuneGrid = expand.grid(alpha = 1, lambda = best_lambda )  # Regularization path
  # alpha = 1,                                    
 # lambda =  
)

# Display the results
print(lasso_model)
## glmnet 
## 
## 44 samples
##  9 predictor
## 
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation 
## Summary of sample sizes: 43, 43, 43, 43, 43, 43, ... 
## Resampling results across tuning parameters:
## 
##   lambda  RMSE        Rsquared   MAE       
##   0.01    0.08312116  0.7065998  0.04020827
##   0.02    0.08013863  0.7359118  0.04017591
##   0.04    0.08497195  0.7575159  0.04279596
##   0.06    0.09643884  0.7400634  0.04976503
##   0.10    0.12772484  0.6265600  0.07300708
##   0.50    0.15693330  1.0000000  0.09872597
##   0.80    0.15693330  1.0000000  0.09872597
##   1.00    0.15693330  1.0000000  0.09872597
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.02.

Model Validation

#Create functions to test for rmse and mac
rmse <- function(actual, predicted) {
  sqrt(mean((actual - predicted)^2))
}

mae <- function(actual, predicted) {
  mean(abs(actual - predicted))
}

rsquared <- function(actual,predicted){
  cor(actual, predicted)^2
}
best_model_index <- coef(prod.gsub, which.min(gsub.df$BIC))
linear.predictions <- best_model_index[1] -best_model_index[2]*test.data$smv + best_model_index[3]*test.data$incentive+best_model_index[4]*test.data$idle_time
lasso.predictions <- predict(lasso_model$finalModel, s = lasso_model$bestTune$lambda,newx = as.matrix(test_data_frame[6:14]))
rmse_linear = rmse(test.data$actual_productivity,linear.predictions)
mae_linear = mae(test.data$actual_productivity,linear.predictions)
# print(rmse_linear)
# print(mae_linear)
rmse_lasso = rmse(test.data$actual_productivity,lasso.predictions)
mae_lasso = mae(test.data$actual_productivity,lasso.predictions)
# print(rmse_lasso)
# print(mae_lasso)
linear_r_squared = rsquared(test.data$actual_productivity,linear.predictions)
lasso_r_squared = rsquared(test.data$actual_productivity,lasso.predictions)
results_table <- data.frame(
  Model = c("Linear with BIC", "Lasso"),
  RMSE = c(rmse_linear, rmse_lasso),
  MAE = c(mae_linear, mae_lasso),
  R_squared = c(linear_r_squared, lasso_r_squared)
)
print(results_table)
##             Model       RMSE        MAE R_squared
## 1 Linear with BIC 0.11410471 0.10339703 0.9137606
## 2           Lasso 0.04760966 0.03514949 0.9184406

Combining two models with one loop of K-fold Cross Validation

In this section, we release the fixed training/test set restruiction. Since our data size is relatively small, the previous method might be biased. Therefore, in this section, we hope to combine the linear model with BIC as well as lasso regression together. We ought to measure both of them under the same folds.

result <- suppressWarnings({



library(glmnet)
library(caret)

set.seed(123)  # for reproducibility


# Define the formula
formula <- as.formula("actual_productivity ~ targeted_productivity + smv + wip + over_time + incentive + idle_time + idle_men + no_of_style_change + no_of_workers")

train.control <- trainControl(method = "LOOCV",savePredictions = TRUE) # 5-fold cross-validation

#Define the hyperparameter values for lasso

best_lambda <- c(0.01, 0.02, 0.04, 0.06, 0.1, 0.5, 0.8, 1)

# Create a model specification for Linear regression

linear_model <- train(formula, 
                       data = sewingTeam01, 
                       method = "lm",
                       trControl = train.control)

# Create a model specification for Lasso regression
lasso_model <- train(
  formula,
  data = sewingTeam01,
  method = "glmnet",
  trControl = train.control,  
  tuneGrid = expand.grid(alpha = 1, lambda = best_lambda )  # Regularization path
  # alpha = 1,                                    
 # lambda =  
)

# Display the results
print(linear_model)
print(lasso_model)
})
## Linear Regression 
## 
## 56 samples
##  9 predictor
## 
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation 
## Summary of sample sizes: 55, 55, 55, 55, 55, 55, ... 
## Resampling results:
## 
##   RMSE        Rsquared   MAE       
##   0.09484692  0.6564894  0.04920214
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
## glmnet 
## 
## 56 samples
##  9 predictor
## 
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation 
## Summary of sample sizes: 55, 55, 55, 55, 55, 55, ... 
## Resampling results across tuning parameters:
## 
##   lambda  RMSE        Rsquared   MAE       
##   0.01    0.07239002  0.7833178  0.03799983
##   0.02    0.07176767  0.7940895  0.03673019
##   0.04    0.07910671  0.8019159  0.04246462
##   0.06    0.09162941  0.7931627  0.05182894
##   0.10    0.12309080  0.7464612  0.07628347
##   0.50    0.15833040  1.0000000  0.10617440
##   0.80    0.15833040  1.0000000  0.10617440
##   1.00    0.15833040  1.0000000  0.10617440
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.02.
# No warnings will be printed here
print(result)
##  lambda RMSE         Rsquared    MAE         
##  "0.01" "0.07239002" "0.7833178" "0.03799983"
##  "0.02" "0.07176767" "0.7940895" "0.03673019"
##  "0.04" "0.07910671" "0.8019159" "0.04246462"
##  "0.06" "0.09162941" "0.7931627" "0.05182894"
##  "0.10" "0.12309080" "0.7464612" "0.07628347"
##  "0.50" "0.15833040" "1.0000000" "0.10617440"
##  "0.80" "0.15833040" "1.0000000" "0.10617440"
##  "1.00" "0.15833040" "1.0000000" "0.10617440"
# Extract coefficients for the linear model
linear_model_summary <- summary(linear_model$finalModel)

# Extract coefficients for the lasso model
lasso_model_coefficients <- coef(lasso_model$finalModel, s = lasso_model$bestTune$lambda)  # Use one of the lambda values

# Display coefficients
cat("Linear Model Coefficients:\n")
## Linear Model Coefficients:
print(linear_model_summary)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15146 -0.02183 -0.00196  0.02210  0.15785 
## 
## Coefficients: (2 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.245e-02  1.831e-01   0.396  0.69411    
## targeted_productivity  3.361e-01  1.293e-01   2.599  0.01240 *  
## smv                   -5.113e-03  1.626e-03  -3.144  0.00286 ** 
## wip                   -6.519e-07  3.252e-06  -0.200  0.84197    
## over_time             -3.377e-06  2.876e-06  -1.174  0.24604    
## incentive              3.339e-03  3.927e-04   8.503 3.87e-11 ***
## idle_time                     NA         NA      NA       NA    
## idle_men                      NA         NA      NA       NA    
## no_of_style_change    -4.179e-02  2.596e-02  -1.610  0.11397    
## no_of_workers          7.307e-03  2.989e-03   2.445  0.01821 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04879 on 48 degrees of freedom
## Multiple R-squared:  0.9156, Adjusted R-squared:  0.9033 
## F-statistic: 74.42 on 7 and 48 DF,  p-value: < 2.2e-16
cat("\nLasso Model Coefficients:\n")
## 
## Lasso Model Coefficients:
print(lasso_model_coefficients)
## 10 x 1 sparse Matrix of class "dgCMatrix"
##                                  s1
## (Intercept)            4.519759e-01
## targeted_productivity  1.352621e-01
## smv                   -1.120817e-05
## wip                    .           
## over_time              .           
## incentive              3.795447e-03
## idle_time              .           
## idle_men               .           
## no_of_style_change    -3.458399e-02
## no_of_workers          .

Model Validation

rmse.linear = linear_model$results$RMSE
rsquared.linear = linear_model$results$Rsquared
mae.linear = linear_model$results$MAE
# Extract coefficients

# Display coefficients
best_lambda_row = lasso_model$results[lasso_model$results$lambda == lasso_model$bestTune$lambda,]
rmse.lasso = best_lambda_row$RMSE
rsquared.lasso = best_lambda_row$Rsquared
mae.lasso = best_lambda_row$MAE
results_table_kfold <- data.frame(
  Model = c("Linear", "Lasso"),
  RMSE = c(rmse.linear, rmse.lasso),
  MAE = c(mae.linear, mae.lasso),
  R_squared = c(rsquared.linear, rsquared.lasso)
)
print(results_table_kfold)
##    Model       RMSE        MAE R_squared
## 1 Linear 0.09484692 0.04920214 0.6564894
## 2  Lasso 0.07176767 0.03673019 0.7940895